In [ ]:
%matplotlib nbagg
import seaborn as sns
from planet4 import io, stats, markings
from planet4.catalog_production import ReleaseManager
In [ ]:
rm = ReleaseManager('v1.0b4')
In [ ]:
db = io.DBManager()
In [ ]:
db.n_image_names
In [ ]:
db.dbname
In [ ]:
blotches = rm.read_blotch_file()
fans = rm.read_fan_file()
In [ ]:
import dask.dataframe as dd
data = dd.read_hdf(db.dbname, 'df')
fan_input = data[data.marking=='fan']
blotch_input = data[data.marking=='blotch']
fan_input.compute().shape
blotch_input.compute().shape
In [ ]:
fans['distance_m'] = fans.distance*fans.map_scale
blotches['radius_1_m'] = blotches.radius_1*blotches.map_scale
blotches['radius_2_m'] = blotches.radius_2*blotches.map_scale
In [ ]:
n_fan_in = 2792963
In [ ]:
fans.shape[0]
In [ ]:
fans.shape[0] / n_fan_in
In [ ]:
blotches.shape[0]
In [ ]:
import scipy
scipy.stats.percentileofscore(fans.distance_m, 100)
Cumulative histogram of fan lengths
In [ ]:
def add_percentage_line(ax, meters, column):
y = scipy.stats.percentileofscore(column, meters)
ax.axhline(y/100)
ax.axvline(meters)
ax.text(meters, y/100, f"{y/100:0.2f}")
In [ ]:
plt.close('all')
In [ ]:
fig, ax = plt.subplots(figsize=(8,4))
sns.distplot(fans.distance_m, bins=500, kde=False, hist_kws={'cumulative':True,'normed':True},
axlabel='Fan length [m]', ax=ax)
ax.set_title("Cumulative normalized histogram for fan lengths")
ax.set_ylabel("Fraction of fans with given length")
add_percentage_line(ax, 100, fans.distance_m)
add_percentage_line(ax, 50, fans.distance_m)
General fan stats, in numbers
In [ ]:
fans.distance_m.describe()
In words, the mean length of fans is {{f"{fans.distance_m.describe()['mean']:.1f}"}} m, while the median is {{f"{fans.distance_m.describe()['50%']:.1f}"}} m.
In [ ]:
plt.figure()
cols = ['radius_1','radius_2']
sns.distplot(blotches[cols], kde=False, bins=np.arange(2.0,50.),
color=['r','g'], label=cols)
plt.legend()
In [ ]:
plt.figure()
cols = ['radius_1_m','radius_2_m']
sns.distplot(blotches[cols], kde=False, bins=np.arange(2.0,50.),
color=['r','g'], label=cols)
plt.legend()
In [ ]:
fig, ax = plt.subplots(figsize=(8,4))
sns.distplot(blotches.radius_2_m, bins=500, kde=False, hist_kws={'cumulative':True,'normed':True},
axlabel='Blotch radius_1 [m]', ax=ax)
ax.set_title("Cumulative normalized histogram for blotch lengths")
ax.set_ylabel("Fraction of blotches with given radius_1")
add_percentage_line(ax, 30, blotches.radius_2_m)
add_percentage_line(ax, 10, blotches.radius_2_m)
In [ ]:
import scipy
scipy.stats.percentileofscore(blotches.radius_2_m, 30)
In [ ]:
plt.close('all')
In [ ]:
fans.query('distance_m > 350')[
'distance_m distance obsid image_x image_y image_id x_tile y_tile'.split()].sort_values(
by='distance_m')
In [ ]:
users1 = markings.ImageID("APF0000dtk").data.user_name.unique()
In [ ]:
users2 = markings.ImageID("de3").data.user_name.unique()
In [ ]:
same = []
for user in users1:
if user in users2:
same.append(user)
In [ ]:
same
In [ ]:
len(users2)
In [ ]:
from planet4 import plotting
In [ ]:
plotting.plot_image_id_pipeline('q45', datapath=rm.catalog, via_obsid=False, figsize=(12,8))
In [ ]:
from planet4 import stats
from planet4 import region_data
In [ ]:
stats.define_season_column(fans)
stats.define_season_column(blotches)
In [ ]:
regions = ['Manhattan2', 'Giza', 'Inca', 'Ithaca']
In [ ]:
for reg in regions:
obj = getattr(region_data, reg)
roi = obj()
for marking in [fans, blotches]:
if reg == 'Manhattan2':
reg = 'Manhattan'
marking.loc[marking.obsid.isin(roi.all_obsids), 'roi'] = reg
In [ ]:
fans.roi.value_counts(dropna=False)
In [ ]:
fans_rois = fans[fans.roi.notnull()]
blotches_rois = blotches[blotches.roi.notnull()]
In [ ]:
fans_rois.roi.value_counts(dropna=False)
In [ ]:
fans.query('season==2').distance_m.median()
In [ ]:
fans.query('season==3').distance_m.median()
In [ ]:
import seaborn as sns
sns.set_palette('Set1')
In [ ]:
fans_rois
In [ ]:
def my_plot(x, **kwargs):
sns.distplot(x, kde=True, **kwargs)
# plt.axvline(x.median(), color='blue')
plt.gca().set_xlim(-10, 150)
In [ ]:
g = sns.FacetGrid(fans_rois, col="roi", hue='season', size=2, aspect=1.1, legend_out=False)
# g.map(sns.distplot, "distance_m", kde=True);
g.map(my_plot, 'distance_m')
g.add_legend()
In [ ]:
g = sns.FacetGrid(fans_rois, col="roi", hue='season', size=2, aspect=1.1, legend_out=False)
g.map(sns.distplot, "distance_m", kde=True);
# g.map(my_plot, 'distance_m')
g.add_legend()
In [ ]:
for region in ['Manhattan2', 'Giza','Ithaca']:
print(region)
obj = getattr(region_data, region)
for s in ['season2','season3']:
print(s)
obsids = getattr(obj, s)
print(fans[fans.obsid.isin(obsids)].distance_m.median())
In [ ]:
import numpy as np
import scipy
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure()
sns.set_palette("hls", 1)
data = np.random.randn(30)
p=sns.kdeplot(data, shade=True)
x,y = p.get_lines()[0].get_data()
#care with the order, it is first y
#initial fills a 0 so the result has same length than x
cdf = scipy.integrate.cumtrapz(y, x, initial=0)
nearest_05 = np.abs(cdf-0.5).argmin()
x_median = x[nearest_05]
y_median = y[nearest_05]
plt.vlines(x_median, 0, y_median)
In [ ]:
import numpy as np
import scipy
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure()
sns.set_palette("hls", 1)
data = np.random.randn(30)
p=sns.kdeplot(data, shade=True)
x,y = p.get_lines()[0].get_data()
#care with the order, it is first y
#initial fills a 0 so the result has same length than x
cdf = scipy.integrate.cumtrapz(y, x, initial=0)
nearest_05 = np.abs(cdf-0.5).argmin()
x_median = x[nearest_05]
y_median = y[nearest_05]
plt.vlines(x_median, 0, y_median)
In [ ]:
np.median(x)
In [ ]:
np.percentile(x, 50)
In [ ]: